import numpy as np
import pandas as pd
import FinanceDataReader as fdr
7 7. Pandas Practice
7.1 Practice 1 : Magic formula investing
= fdr.StockListing('KRX')
krx 4] krx.iloc[:, :
Code | ISU_CD | Name | Market | |
---|---|---|---|---|
0 | 005930 | KR7005930003 | 삼성전자 | KOSPI |
1 | 000660 | KR7000660001 | SK하이닉스 | KOSPI |
2 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI |
3 | 005380 | KR7005380001 | 현대차 | KOSPI |
4 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI |
... | ... | ... | ... | ... |
2805 | 002995 | KR7002991008 | 금호건설우 | KOSPI |
2806 | 266170 | KR7266170000 | 뿌리깊은나무들 | KONEX |
2807 | 217320 | KR7217320001 | 썬테크 | KONEX |
2808 | 245450 | KR7245450002 | 씨앤에스링크 | KONEX |
2809 | 308700 | KR7308700004 | 테크엔 | KONEX |
2810 rows × 4 columns
= pd.read_csv('investment_hw/PER_ROA.csv')
df 5, [0, 3, 5, 6, 7, 8, 9]] df.iloc[:
종목명 | 등락률 | 거래량 | 시가총액 | 영업이익 | PER | ROA | |
---|---|---|---|---|---|---|---|
0 | AJ네트웍스 | -1.48% | 27984 | 2177 | -213.0 | 4.14 | 2.18 |
1 | AJ렌터카 | -0.47% | 50886 | 2325 | 218.0 | 61.40 | 0.28 |
2 | AK홀딩스 | -1.99% | 15535 | 4564 | 2697.0 | 4.27 | 6.52 |
3 | ARIRANG 200 | -0.10% | 234206 | 7572 | NaN | NaN | NaN |
4 | ARIRANG 200동일가중 | -0.19% | 31 | 46 | NaN | NaN | NaN |
= df.loc [ df.isnull().sum(axis=1) == 0, : ] df
= per_val = df['PER']
per < 0 ] = np.nan
per_val[ per = per_val.rank( ascending=True, na_option='bottom')
per_rank per_rank.head()
0 35.0
1 519.0
2 38.0
50 222.0
51 408.0
Name: PER, dtype: float64
= roa_val = df['ROA']
roa< 0] = np.nan
roa_val[roa = roa_val.rank( ascending=False, na_option='bottom')
roa_rank roa_rank.head()
0 355.0
1 556.0
2 110.0
50 239.5
51 37.0
Name: ROA, dtype: float64
= per_rank + roa_rank
result = result.rank( ascending=True, na_option='bottom')
result_rank > 10 ] = 0
result_rank[ result_rank > 0 ] = 1
result_rank [ result_rank result_rank
0 0.0
1 0.0
2 0.0
50 0.0
51 0.0
...
1522 0.0
1523 0.0
1524 0.0
1526 0.0
1529 0.0
Length: 758, dtype: float64
sum() result_rank.
10.0
= df.loc[ result_rank > 0 , ['종목명', '시가총액']].copy()
mf_df mf_df
종목명 | 시가총액 | |
---|---|---|
96 | HDC | 7468 |
440 | SIMPAC | 1835 |
459 | SK하이닉스 | 541634 |
749 | 대한유화 | 7508 |
1040 | 세아제강지주 | 2044 |
1057 | 신대양제지 | 2756 |
1157 | 에쓰씨엔지니어링 | 354 |
1298 | 케이씨 | 1694 |
1443 | 한일홀딩스 | 2904 |
1513 | 효성 | 18353 |
= df.loc[ result_rank > 0, '종목명'].values
mf_stock_list mf_stock_list
array(['HDC', 'SIMPAC', 'SK하이닉스', '대한유화', '세아제강지주', '신대양제지', '에쓰씨엔지니어링',
'케이씨', '한일홀딩스', '효성'], dtype=object)
4].head() krx.iloc[:,:
Code | ISU_CD | Name | Market | |
---|---|---|---|---|
0 | 005930 | KR7005930003 | 삼성전자 | KOSPI |
1 | 000660 | KR7000660001 | SK하이닉스 | KOSPI |
2 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI |
3 | 005380 | KR7005380001 | 현대차 | KOSPI |
4 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI |
'종목코드']='' mf_df[
for stock in mf_stock_list:
'종목명']==stock, '종목코드']=krx[krx['Name']==stock]['Code'].values
mf_df.loc[ mf_df[ mf_df
종목명 | 시가총액 | 종목코드 | |
---|---|---|---|
96 | HDC | 7468 | 012630 |
440 | SIMPAC | 1835 | 009160 |
459 | SK하이닉스 | 541634 | 000660 |
749 | 대한유화 | 7508 | 006650 |
1040 | 세아제강지주 | 2044 | 003030 |
1057 | 신대양제지 | 2756 | 016590 |
1157 | 에쓰씨엔지니어링 | 354 | 023960 |
1298 | 케이씨 | 1694 | 029460 |
1443 | 한일홀딩스 | 2904 | 003300 |
1513 | 효성 | 18353 | 004800 |
for x in mf_df['종목코드'].values :
= fdr.DataReader( x, '2019-01-01', '2019-12-31' )
df
= df.loc[df.index[-1], 'Close'] / df.loc[df.index[0], 'Close']-1
cum_ret '종목코드']==x, '수익률'] = cum_ret
mf_df.loc[mf_df[
= df['Close'].cummax()
historical_max = df['Close']/historical_max - 1.
daily_drawdown
= daily_drawdown.min()
MDD '종목코드']==x, '최대낙폭'] = MDD
mf_df.loc[mf_df[
'daily_rtn'] = df['Close'].pct_change( periods = 1 )
df[= df['daily_rtn'].std()*np.sqrt(252)
VOL '종목코드']==x, '변동성'] = VOL
mf_df.loc[mf_df[
= None
df '시가총액', ascending=False) mf_df.sort_values(
종목명 | 시가총액 | 종목코드 | 수익률 | 최대낙폭 | 변동성 | |
---|---|---|---|---|---|---|
459 | SK하이닉스 | 541634 | 000660 | 0.552805 | -0.228606 | 0.353011 |
1513 | 효성 | 18353 | 004800 | 0.649635 | -0.115124 | 0.300178 |
749 | 대한유화 | 7508 | 006650 | -0.203390 | -0.407609 | 0.346504 |
96 | HDC | 7468 | 012630 | -0.332326 | -0.504808 | 0.330864 |
1443 | 한일홀딩스 | 2904 | 003300 | -0.197424 | -0.363892 | 0.324106 |
1057 | 신대양제지 | 2756 | 016590 | 0.034783 | -0.388466 | 0.364506 |
1040 | 세아제강지주 | 2044 | 003030 | -0.008368 | -0.228353 | 0.240910 |
440 | SIMPAC | 1835 | 009160 | 0.206967 | -0.299566 | 0.407815 |
1298 | 케이씨 | 1694 | 029460 | 0.594017 | -0.369942 | 0.442983 |
1157 | 에쓰씨엔지니어링 | 354 | 023960 | -0.062092 | -0.479592 | 0.565059 |
7.2 Practice 2 : Mean reversion strategy
= pd.read_csv('investment_hw/SPY.csv')
df df.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 1993-01-29 | 43.96875 | 43.96875 | 43.75000 | 43.93750 | 26.706757 | 1003200 |
1 | 1993-02-01 | 43.96875 | 44.25000 | 43.96875 | 44.25000 | 26.896694 | 480500 |
2 | 1993-02-02 | 44.21875 | 44.37500 | 44.12500 | 44.34375 | 26.953669 | 201300 |
3 | 1993-02-03 | 44.40625 | 44.84375 | 44.37500 | 44.81250 | 27.238594 | 529400 |
4 | 1993-02-04 | 44.96875 | 45.09375 | 44.46875 | 45.00000 | 27.352570 | 531500 |
df.describe()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
count | 6648.000000 | 6648.000000 | 6648.000000 | 6648.000000 | 6648.000000 | 6.648000e+03 |
mean | 133.762935 | 134.541071 | 132.893598 | 133.759854 | 110.399391 | 8.440122e+07 |
std | 59.488006 | 59.671285 | 59.277882 | 59.492056 | 64.113369 | 9.837713e+07 |
min | 43.343750 | 43.531250 | 42.812500 | 43.406250 | 26.383823 | 5.200000e+03 |
25% | 96.780937 | 97.735000 | 95.726562 | 96.921875 | 71.256485 | 6.966775e+06 |
50% | 124.433750 | 125.335938 | 123.500000 | 124.312500 | 93.641503 | 5.709990e+07 |
75% | 151.702503 | 152.514999 | 150.810624 | 151.791714 | 125.251474 | 1.229908e+08 |
max | 296.040009 | 296.309998 | 293.760010 | 295.859985 | 294.427979 | 8.710263e+08 |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 6648 non-null object
1 Open 6648 non-null float64
2 High 6648 non-null float64
3 Low 6648 non-null float64
4 Close 6648 non-null float64
5 Adj Close 6648 non-null float64
6 Volume 6648 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 363.7+ KB
'Date']=pd.to_datetime(df.Date)
df[ df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 6648 non-null datetime64[ns]
1 Open 6648 non-null float64
2 High 6648 non-null float64
3 Low 6648 non-null float64
4 Close 6648 non-null float64
5 Adj Close 6648 non-null float64
6 Volume 6648 non-null int64
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 363.7 KB
= df.loc[:,['Date', 'Adj Close']].copy()
price_df price_df.head()
Date | Adj Close | |
---|---|---|
0 | 1993-01-29 | 26.706757 |
1 | 1993-02-01 | 26.896694 |
2 | 1993-02-02 | 26.953669 |
3 | 1993-02-03 | 27.238594 |
4 | 1993-02-04 | 27.352570 |
'Date'], inplace=True)
price_df.set_index([ price_df.head()
Adj Close | |
---|---|
Date | |
1993-01-29 | 26.706757 |
1993-02-01 | 26.896694 |
1993-02-02 | 26.953669 |
1993-02-03 | 27.238594 |
1993-02-04 | 27.352570 |
# 볼린저 밴드
# - 상단 밴드 : 중간밴드 + 2*20일 이동표준편차
# - 중간 밴드 : 20일 이동표준편차
# - 하단 밴드 : 중간밴드 - 2*20일 이동표준편차
= 20
n = 2
sigma def bollinger_band(price_df, n, sigma):
= price_df.copy()
bb 'center']= price_df['Adj Close'].rolling(n).mean()
bb['ub']= bb['center'] + sigma * price_df['Adj Close'].rolling(window=n).std()
bb['lb']= bb['center'] - sigma * price_df['Adj Close'].rolling(window=n).std()
bb[return bb
= bollinger_band(price_df, n, sigma)
bollinger bollinger.tail()
Adj Close | center | ub | lb | |
---|---|---|---|---|
Date | ||||
2019-06-18 | 290.984741 | 282.981668 | 293.213256 | 272.750080 |
2019-06-19 | 291.641541 | 283.307582 | 294.219075 | 272.396089 |
2019-06-20 | 294.427979 | 283.816605 | 295.809022 | 271.824187 |
2019-06-21 | 294.000000 | 284.477884 | 297.200319 | 271.755449 |
2019-06-24 | 293.640015 | 285.089319 | 298.355028 | 271.823610 |
= '2009-01-02'
base_date = bollinger.loc[base_date:]
sample sample.head()
Adj Close | center | ub | lb | |
---|---|---|---|---|
Date | ||||
2009-01-02 | 75.099487 | 71.378963 | 74.538229 | 68.219698 |
2009-01-05 | 75.010582 | 71.711677 | 74.931608 | 68.491746 |
2009-01-06 | 75.511505 | 71.964058 | 75.543401 | 68.384716 |
2009-01-07 | 73.249435 | 71.980327 | 75.580937 | 68.379718 |
2009-01-08 | 73.548378 | 72.071645 | 75.736733 | 68.406557 |
def create_trade_book(sample):
= sample[['Adj Close']].copy()
book 'trade'] = ''
book[return book
def tradings ( sample, book ):
for i in sample.index:
if sample.loc[i, 'Adj Close'] > sample.loc[i, 'ub']:
'trade']=''
book.loc[i, elif sample.loc[i, 'lb'] > sample.loc[i, 'Adj Close']:
if book.shift(1).loc[i, 'trade']=='buy':
'trade']='buy'
book.loc[i, else :
'trade']='buy'
book.loc[i, elif (sample.loc[i, 'ub']>= sample.loc[i, 'Adj Close'] and
'Adj Close']>=sample.loc[i, 'lb']):
sample.loc[i, if book.shift(1).loc[i, 'trade'] == 'buy':
'trade']='buy'
book.loc[i, else :
'trade']=''
book.loc[i, return book
= create_trade_book(sample)
book = tradings(sample, book)
book 10) book.tail(
Adj Close | trade | |
---|---|---|
Date | ||
2019-06-11 | 287.501678 | buy |
2019-06-12 | 286.994171 | buy |
2019-06-13 | 288.178375 | buy |
2019-06-14 | 287.859955 | buy |
2019-06-17 | 287.969391 | buy |
2019-06-18 | 290.984741 | buy |
2019-06-19 | 291.641541 | buy |
2019-06-20 | 294.427979 | buy |
2019-06-21 | 294.000000 | buy |
2019-06-24 | 293.640015 | buy |
def returns(book):
= 1.0
rtn 'return']=1
book[= 0.0
buy = 0.0
sell for i in book.index:
#long 진입
if book.loc[i, 'trade']=='buy' and book.shift(1).loc[i, 'trade']=='':
= book.loc[i, 'Adj Close']
buy print('진입일 :', i, '\n\t\t long 진입가격:', buy)
#long 청산
elif book.loc[i, 'trade']=='' and book.shift(1).loc[i, 'trade']=='buy':
= book.loc[i, 'Adj Close']
sell = (sell-buy)/buy + 1 # 손익계산
rtn 'return']= rtn
book.loc[i, print('청산일 :', i, '\n\t\t long 진입가격:', buy,
'long 청산가격:', sell, '\n\t\t| return :', round(rtn, 4))
if book.loc[i, 'trade']=='': # 제로 포지션
= sell = 0.0
buy
'acc return'] = book['return'].cumprod()
book[= book['acc return'][-1]
acc_rtn print( 'Accumulated return :', round( acc_rtn, 4))
return( round(acc_rtn, 4) )
returns(book)
진입일 : 2009-01-20 00:00:00
long 진입가격: 65.089966
청산일 : 2009-03-23 00:00:00
long 진입가격: 65.089966 long 청산가격: 66.898392
| return : 1.0278
진입일 : 2010-01-22 00:00:00
long 진입가격: 90.269791
청산일 : 2010-04-14 00:00:00
long 진입가격: 90.269791 long 청산가격: 100.584618
| return : 1.1143
진입일 : 2010-05-04 00:00:00
long 진입가격: 97.538597
청산일 : 2010-10-13 00:00:00
long 진입가격: 97.538597 long 청산가격: 98.862717
| return : 1.0136
진입일 : 2011-03-10 00:00:00
long 진입가격: 109.513054
청산일 : 2011-04-26 00:00:00
long 진입가격: 109.513054 long 청산가격: 114.094101
| return : 1.0418
진입일 : 2011-05-23 00:00:00
long 진입가격: 111.783257
청산일 : 2011-06-30 00:00:00
long 진입가격: 111.783257 long 청산가격: 112.26088
| return : 1.0043
진입일 : 2011-08-02 00:00:00
long 진입가격: 106.748672
청산일 : 2012-02-03 00:00:00
long 진입가격: 106.748672 long 청산가격: 115.768776
| return : 1.0845
진입일 : 2012-04-10 00:00:00
long 진입가격: 117.451515
청산일 : 2012-07-03 00:00:00
long 진입가격: 117.451515 long 청산가격: 119.371857
| return : 1.0164
진입일 : 2012-10-23 00:00:00
long 진입가격: 123.511292
청산일 : 2012-12-18 00:00:00
long 진입가격: 123.511292 long 청산가격: 126.961044
| return : 1.0279
진입일 : 2013-06-05 00:00:00
long 진입가격: 142.477417
청산일 : 2013-07-11 00:00:00
long 진입가격: 142.477417 long 청산가격: 148.711197
| return : 1.0438
진입일 : 2013-08-15 00:00:00
long 진입가격: 147.769791
청산일 : 2013-09-11 00:00:00
long 진입가격: 147.769791 long 청산가격: 150.45195
| return : 1.0182
진입일 : 2013-10-08 00:00:00
long 진입가격: 147.686783
청산일 : 2013-10-17 00:00:00
long 진입가격: 147.686783 long 청산가격: 154.594528
| return : 1.0468
진입일 : 2014-01-24 00:00:00
long 진입가격: 160.521667
청산일 : 2014-04-02 00:00:00
long 진입가격: 160.521667 long 청산가격: 170.233917
| return : 1.0605
진입일 : 2014-04-11 00:00:00
long 진입가격: 163.591492
청산일 : 2014-05-27 00:00:00
long 진입가격: 163.591492 long 청산가격: 172.613312
| return : 1.0551
진입일 : 2014-07-31 00:00:00
long 진입가격: 174.862244
청산일 : 2014-09-18 00:00:00
long 진입가격: 174.862244 long 청산가격: 182.768143
| return : 1.0452
진입일 : 2014-09-25 00:00:00
long 진입가격: 178.636536
청산일 : 2016-04-13 00:00:00
long 진입가격: 178.636536 long 청산가격: 195.218124
| return : 1.0928
진입일 : 2016-06-24 00:00:00
long 진입가격: 191.742584
청산일 : 2016-08-11 00:00:00
long 진입가격: 191.742584 long 청산가격: 206.280853
| return : 1.0758
진입일 : 2016-09-09 00:00:00
long 진입가격: 201.21463
청산일 : 2016-12-07 00:00:00
long 진입가격: 201.21463 long 청산가격: 212.964615
| return : 1.0584
진입일 : 2017-03-21 00:00:00
long 진입가격: 223.89772
청산일 : 2017-04-24 00:00:00
long 진입가격: 223.89772 long 청산가격: 227.193008
| return : 1.0147
진입일 : 2017-07-06 00:00:00
long 진입가격: 231.55455
청산일 : 2017-07-14 00:00:00
long 진입가격: 231.55455 long 청산가격: 236.377182
| return : 1.0208
진입일 : 2017-08-10 00:00:00
long 진입가격: 234.644501
청산일 : 2017-09-11 00:00:00
long 진입가격: 234.644501 long 청산가격: 239.890701
| return : 1.0224
진입일 : 2018-02-05 00:00:00
long 진입가격: 256.626129
청산일 : 2018-05-10 00:00:00
long 진입가격: 256.626129 long 청산가격: 265.551544
| return : 1.0348
진입일 : 2018-06-27 00:00:00
long 진입가격: 264.125763
청산일 : 2018-08-07 00:00:00
long 진입가격: 264.125763 long 청산가격: 280.040985
| return : 1.0603
진입일 : 2018-10-10 00:00:00
long 진입가격: 274.137573
청산일 : 2019-03-18 00:00:00
long 진입가격: 274.137573 long 청산가격: 280.96347
| return : 1.0249
진입일 : 2019-05-13 00:00:00
long 진입가격: 279.50058
Accumulated return : 2.6528
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:15: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '1.027783483555668' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
book.loc[i, 'return']= rtn
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:23: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
acc_rtn = book['acc return'][-1]
2.6528
book.tail()
Adj Close | trade | return | acc return | |
---|---|---|---|---|
Date | ||||
2019-06-18 | 290.984741 | buy | 1.0 | 2.652793 |
2019-06-19 | 291.641541 | buy | 1.0 | 2.652793 |
2019-06-20 | 294.427979 | buy | 1.0 | 2.652793 |
2019-06-21 | 294.000000 | buy | 1.0 | 2.652793 |
2019-06-24 | 293.640015 | buy | 1.0 | 2.652793 |
= book['return'].resample('A').aggregate(
result lambda x: (x-1).std() * np.sqrt(252) ])
[np.prod, = ['Return', 'Vol']
result.columns result
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: 'A' is deprecated and will be removed in a future version, please use 'YE' instead.
result = book['return'].resample('A').aggregate(
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: The provided callable <function prod at 0x105f46dd0> is currently using SeriesGroupBy.prod. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "prod" instead.
result = book['return'].resample('A').aggregate(
Return | Vol | |
---|---|---|
Date | ||
2009-12-31 | 1.027783 | 0.027783 |
2010-12-31 | 1.129393 | 0.115017 |
2011-12-31 | 1.046283 | 0.042032 |
2012-12-31 | 1.133016 | 0.090658 |
2013-12-31 | 1.112403 | 0.066348 |
2014-12-31 | 1.169581 | 0.093156 |
2015-12-31 | 1.000000 | 0.000000 |
2016-12-31 | 1.244337 | 0.132818 |
2017-12-31 | 1.059011 | 0.033853 |
2018-12-31 | 1.097132 | 0.069591 |
2019-12-31 | 1.024900 | 0.036083 |
'trade']=='buy') &
book[(book['trade'].shift(1)=='')].index.year.value_counts().sort_index() (book[
Date
2009 1
2010 2
2011 3
2012 2
2013 3
2014 4
2016 2
2017 3
2018 3
2019 1
Name: count, dtype: int64
'trade']=='') & (book['trade'].shift(1)=='buy')].head() sample[(book[
Adj Close | center | ub | lb | |
---|---|---|---|---|
Date | ||||
2009-03-23 | 66.898392 | 60.247113 | 66.842427 | 53.651799 |
2010-04-14 | 100.584618 | 97.853165 | 100.219620 | 95.486710 |
2010-10-13 | 98.862717 | 96.240554 | 98.860432 | 93.620676 |
2011-04-26 | 114.094101 | 112.277610 | 114.046010 | 110.509210 |
2011-06-30 | 112.260880 | 109.184084 | 111.624576 | 106.743592 |